library(ggplot2)
library(reshape2)
library(scales)

setwd("/Users/ab6415/Documents/Work_laptop_stuff/Bioinformatics/Analysis/INTEGRATOR_PAPER_ANALYSES/Fig1_Fig3_FigS3_FigS4_precursors_analysis/length/")

piRNA_counts_wholeanimal_E1 <- list.files(path="../counts/wholeworm/",
    pattern="*pis"); piRNA_counts_wholeanimal_E1<-paste0("../counts/wholeworm/",piRNA_counts_wholeanimal_E1,sep="")
piRNA_counts_wholeanimal_E1_longreads <- list.files(path="../counts/wholeworm_longreads/",
    pattern="*pis"); piRNA_counts_wholeanimal_E1_longreads<-paste0("../counts/wholeworm_longreads/",piRNA_counts_wholeanimal_E1_longreads,sep="")
piRNA_counts_germnuclei_total <- list.files(path="../counts/germnuclei_total/",
    pattern="*pis"); piRNA_counts_germnuclei_total<-paste0("../counts/germnuclei_total/",piRNA_counts_germnuclei_total,sep="")
piRNA_counts_germnuclei_fractionation <- list.files(path="../counts/germnuclei_fractionation_rep1/",
    pattern="*pis"); piRNA_counts_germnuclei_fractionation<-paste0("../counts/germnuclei_fractionation_rep1/",piRNA_counts_germnuclei_fractionation,sep="")
piRNA_counts_germnuclei_fractionation_rep2 <- list.files(path="../counts/germnuclei_fractionation_rep2/",
    pattern="*pis"); piRNA_counts_germnuclei_fractionation_rep2<-paste0("../counts/germnuclei_fractionation_rep2/",piRNA_counts_germnuclei_fractionation_rep2,sep="")


piRNA_counts_filenames<-c(piRNA_counts_wholeanimal_E1,piRNA_counts_wholeanimal_E1_longreads,piRNA_counts_germnuclei_total,piRNA_counts_germnuclei_fractionation,piRNA_counts_germnuclei_fractionation_rep2)
piRNA_counts_filenames<-piRNA_counts_filenames[c(1:13,19:21,14:18,27:30,22,23,25,26)]

names <-c("wholeworm_EV_col1","wholeworm_ints11_col1","wholeworm_EV_col2","wholeworm_ints11_col2","wholeworm_EV_96h","wholeworm_ints11_96h","wholeworm_EV_col1_longreads","wholeworm_ints11_col1_longreads","wholeworm_ints11_col2_longreads","germnuclei_total_N2_EV","germnuclei_total_N2_ints11","germnuclei_total_TFIIS_EV","germnuclei_total_TFIIS_ints11","germnuclei_NP_N2_EV","germnuclei_NP_N2_ints11","germnuclei_NP_TFIIS_EV","germnuclei_NP_TFIIS_ints11","germnuclei_CHR_N2_EV","germnuclei_CHR_N2_ints11","germnuclei_CHR_TFIIS_EV","germnuclei_CHR_TFIIS_ints11","germnuclei_rep2_NP_N2_EV","germnuclei_rep2_NP_N2_ints11","germnuclei_rep2_NP_TFIIS_EV","germnuclei_rep2_NP_TFIIS_ints11","germnuclei_rep2_CHR_N2_EV","germnuclei_rep2_CHR_N2_ints11","germnuclei_rep2_CHR_TFIIS_EV","germnuclei_rep2_CHR_TFIIS_ints11")

piRNA_counts_filenames
##  [1] "../counts/wholeworm/Sample_1_EV_col1.fastq.trimmed.sorted.bed.pis"                                          
##  [2] "../counts/wholeworm/Sample_3_ints_11_col1.fastq.trimmed.sorted.bed.pis"                                     
##  [3] "../counts/wholeworm/Sample_4_EV_col2.fastq.trimmed.sorted.bed.pis"                                          
##  [4] "../counts/wholeworm/Sample_6_ints_11_col2.fastq.trimmed.sorted.bed.pis"                                     
##  [5] "../counts/wholeworm/Sample_7_EV_col2_96h.fastq.trimmed.sorted.bed.pis"                                      
##  [6] "../counts/wholeworm/Sample_9_ints_11_col2_96h.fastq.trimmed.sorted.bed.pis"                                 
##  [7] "../counts/wholeworm_longreads/1_EV_col1_S1_R1_001.fastq.trimmed.sorted.bed.pis"                             
##  [8] "../counts/wholeworm_longreads/3_ints-11_col1_S2_R1_001.fastq.trimmed.sorted.bed.pis"                        
##  [9] "../counts/wholeworm_longreads/9_ints-11_col2_96h_S3_R1_001.fastq.trimmed.sorted.bed.pis"                    
## [10] "../counts/germnuclei_total/Sample_13_N2_EV_CR_S63_L007_R1_001.fastq.trimmed.sorted.bed.pis"                 
## [11] "../counts/germnuclei_total/Sample_14_N2_Ints11_CR_S64_L007_R1_001.fastq.trimmed.sorted.bed.pis"             
## [12] "../counts/germnuclei_total/Sample_15_TFIIS_EV_CR_S65_L007_R1_001.fastq.trimmed.sorted.bed.pis"              
## [13] "../counts/germnuclei_total/Sample_16_TFIIS_Ints11_CR_S66_L007_R1_001.fastq.trimmed.sorted.bed.pis"          
## [14] "../counts/germnuclei_fractionation_rep1/7_N2_EV_np_S7_R1_001.fastq.trimmed.sorted.bed.pis"                  
## [15] "../counts/germnuclei_fractionation_rep1/8_N2_ints-11RNAi_np_S8_R1_001.fastq.trimmed.sorted.bed.pis"         
## [16] "../counts/germnuclei_fractionation_rep1/9_TFIIS_EV_np_S9_R1_001.fastq.trimmed.sorted.bed.pis"               
## [17] "../counts/germnuclei_fractionation_rep1/10_TFIIS_ints-11RNAi_np_S10_R1_001.fastq.trimmed.sorted.bed.pis"    
## [18] "../counts/germnuclei_fractionation_rep1/25_N2_EV_chr_S11_R1_001.fastq.trimmed.sorted.bed.pis"               
## [19] "../counts/germnuclei_fractionation_rep1/26_N2_ints-11RNAi_chr_S12_R1_001.fastq.trimmed.sorted.bed.pis"      
## [20] "../counts/germnuclei_fractionation_rep1/27_TFIIS_EV_chr_S13_R1_001.fastq.trimmed.sorted.bed.pis"            
## [21] "../counts/germnuclei_fractionation_rep1/28_TFIIS_ints-11RNAi_chr_S14_R1_001.fastq.trimmed.sorted.bed.pis"   
## [22] "../counts/germnuclei_fractionation_rep2/5_N2_EV_np_S5_R1_001.fastq.trimmed.sorted.bed.pis"                  
## [23] "../counts/germnuclei_fractionation_rep2/6_N2_ints11RNAi_np_S6_R1_001.fastq.trimmed.sorted.bed.pis"          
## [24] "../counts/germnuclei_fractionation_rep2/7_TFIIS_EV_np_S7_R1_001.fastq.trimmed.sorted.bed.pis"               
## [25] "../counts/germnuclei_fractionation_rep2/8_TFIIS_ints11RNAi_np_S8_R1_001.fastq.trimmed.sorted.bed.pis"       
## [26] "../counts/germnuclei_fractionation_rep2/1_N2_EV_chr_S1_R1_001.fastq.trimmed.sorted.bed.pis"                 
## [27] "../counts/germnuclei_fractionation_rep2/13_N2_ints11RNAi_chr_techrep_S9_R1_001.fastq.trimmed.sorted.bed.pis"
## [28] "../counts/germnuclei_fractionation_rep2/3_TFIIS_EV_chr_S3_R1_001.fastq.trimmed.sorted.bed.pis"              
## [29] "../counts/germnuclei_fractionation_rep2/4_TFIIS_ints11RNAi_chr_S4_R1_001.fastq.trimmed.sorted.bed.pis"
names         
##  [1] "wholeworm_EV_col1"                "wholeworm_ints11_col1"           
##  [3] "wholeworm_EV_col2"                "wholeworm_ints11_col2"           
##  [5] "wholeworm_EV_96h"                 "wholeworm_ints11_96h"            
##  [7] "wholeworm_EV_col1_longreads"      "wholeworm_ints11_col1_longreads" 
##  [9] "wholeworm_ints11_col2_longreads"  "germnuclei_total_N2_EV"          
## [11] "germnuclei_total_N2_ints11"       "germnuclei_total_TFIIS_EV"       
## [13] "germnuclei_total_TFIIS_ints11"    "germnuclei_NP_N2_EV"             
## [15] "germnuclei_NP_N2_ints11"          "germnuclei_NP_TFIIS_EV"          
## [17] "germnuclei_NP_TFIIS_ints11"       "germnuclei_CHR_N2_EV"            
## [19] "germnuclei_CHR_N2_ints11"         "germnuclei_CHR_TFIIS_EV"         
## [21] "germnuclei_CHR_TFIIS_ints11"      "germnuclei_rep2_NP_N2_EV"        
## [23] "germnuclei_rep2_NP_N2_ints11"     "germnuclei_rep2_NP_TFIIS_EV"     
## [25] "germnuclei_rep2_NP_TFIIS_ints11"  "germnuclei_rep2_CHR_N2_EV"       
## [27] "germnuclei_rep2_CHR_N2_ints11"    "germnuclei_rep2_CHR_TFIIS_EV"    
## [29] "germnuclei_rep2_CHR_TFIIS_ints11"
piRNA_counts <- lapply(piRNA_counts_filenames,function(i){
  read.csv(paste(i,sep=""), header=FALSE,sep="\t")
})


piRNA_counts_precfiltered <- lapply(piRNA_counts, function(df){
 df$plus_d <- df$V10-df$V2
 df$minus_d <- df$V3-df$V11
 df_plus<-df[which(df$plus_d==2 & df$V8=="+"),]
 df_minus<-df[which(df$minus_d==(2) & df$V8=="-"),]
 return(rbind(df_plus,df_minus))
})

motifdep_piRNA_precursors<-lapply(piRNA_counts_precfiltered,function(df){return(df[c(grep("^21UR-",df$V12),grep("^piRNA_type_1",df$V12)),])})
motifind_piRNA_precursors<-lapply(piRNA_counts_precfiltered,function(df){return(df[grep("^piRNA_type_2",df$V12),])})

N2_EV_np<-rbind(motifdep_piRNA_precursors[[14]],motifdep_piRNA_precursors[[22]])
N2_EV_np<-aggregate(N2_EV_np[,"V6"],by=N2_EV_np[c("V1","V2","V3","V4","V5","V12")],FUN=sum)
N2_ints11_np<-rbind(motifdep_piRNA_precursors[[15]],motifdep_piRNA_precursors[[23]])
N2_ints11_np<-aggregate(N2_ints11_np[,"V6"],by=N2_ints11_np[c("V1","V2","V3","V4","V5","V12")],FUN=sum)
tfiis_EV_np<-rbind(motifdep_piRNA_precursors[[16]],motifdep_piRNA_precursors[[24]])
tfiis_EV_np<-aggregate(tfiis_EV_np[,"V6"],by=tfiis_EV_np[c("V1","V2","V3","V4","V5","V12")],FUN=sum)
tfiis_ints11_np<-rbind(motifdep_piRNA_precursors[[17]],motifdep_piRNA_precursors[[25]])
tfiis_ints11_np<-aggregate(tfiis_ints11_np[,"V6"],by=tfiis_ints11_np[c("V1","V2","V3","V4","V5","V12")],FUN=sum)

N2_EV_chr<-rbind(motifdep_piRNA_precursors[[18]],motifdep_piRNA_precursors[[26]])
N2_EV_chr<-aggregate(N2_EV_chr[,"V6"],by=N2_EV_chr[c("V1","V2","V3","V4","V5","V12")],FUN=sum)
N2_ints11_chr<-rbind(motifdep_piRNA_precursors[[19]],motifdep_piRNA_precursors[[27]])
N2_ints11_chr<-aggregate(N2_ints11_chr[,"V6"],by=N2_ints11_chr[c("V1","V2","V3","V4","V5","V12")],FUN=sum)
tfiis_EV_chr<-rbind(motifdep_piRNA_precursors[[20]],motifdep_piRNA_precursors[[28]])
tfiis_EV_chr<-aggregate(tfiis_EV_chr[,"V6"],by=tfiis_EV_chr[c("V1","V2","V3","V4","V5","V12")],FUN=sum)
tfiis_ints11_chr<-rbind(motifdep_piRNA_precursors[[21]],motifdep_piRNA_precursors[[29]])
tfiis_ints11_chr<-aggregate(tfiis_ints11_chr[,"V6"],by=tfiis_ints11_chr[c("V1","V2","V3","V4","V5","V12")],FUN=sum)

motifdep_piRNA_precursors_agg<-list(N2_EV_np,N2_ints11_np,tfiis_EV_np,tfiis_ints11_np,
                                    N2_EV_chr,N2_ints11_chr,tfiis_EV_chr,tfiis_ints11_chr)

names_agg<-c("N2_EV_np","N2_ints11_np","tfiis_EV_np","tfiis_ints11_np",
             "N2_EV_chr","N2_ints11_chr","tfiis_EV_chr","tfiis_ints11_chr")
build_lenmatrix_numreads<-function(df){
  loci<-as.character(unique(df$V12))
  len_matrix<-matrix(data=0, nrow = length(loci), ncol = 75)
  rownames(len_matrix)<-loci
  colnames(len_matrix)<-1:75
  for (row in seq(nrow(df))){
    locus<-as.character(df[row,"V12"])
    len<-df[row,"V4"]
    numreads<-df[row,"x"]
    len_matrix[locus,len]<-numreads
  }
  return(len_matrix[,18:75])
}

build_lenmatrix_uniqueseqs<-function(df){
  loci<-as.character(unique(df$V12))
  len_matrix<-matrix(data=0, nrow = length(loci), ncol = 75)
  rownames(len_matrix)<-loci
  colnames(len_matrix)<-1:75
  for (row in seq(nrow(df))){
    locus<-as.character(df[row,"V12"])
    len<-df[row,"V4"]
    len_matrix[locus,len]<-1
  }
  return(len_matrix[,18:75])
}



len_matrix_numreads<-lapply(motifdep_piRNA_precursors_agg,FUN=build_lenmatrix_numreads)
len_matrix_uniqueseqs<-lapply(motifdep_piRNA_precursors_agg,FUN=build_lenmatrix_uniqueseqs)

for (i in seq(length(len_matrix_numreads))){

data<-len_matrix_uniqueseqs[[i]]
data<-data[order(apply(data,MARGIN=1,FUN=sum),decreasing = FALSE),]

melted_data<-melt(data)
melted_data$Var1<-factor(melted_data$Var1,levels=rownames(data))

print(ggplot(melted_data, aes(Var2,Var1, fill=value)) + geom_raster() +
  theme(axis.text.y=element_blank(),
        axis.ticks.y=element_blank())+
  ylab("motif-dependent piRNA loci")+xlab("precursor length")+
    scale_fill_distiller(palette = "Greys")+
  ggtitle(names_agg[i]))
  ggsave(paste(names_agg[i],"_lendist_heatmap.pdf",sep=""))



data<-len_matrix_numreads[[i]]
data<-data[order(apply(data,MARGIN=1,FUN=sum),decreasing = FALSE),]

melted_data<-melt(data)
melted_data$Var1<-factor(melted_data$Var1,levels=rownames(data))

print(ggplot(melted_data, aes(Var2,Var1, fill=value)) + geom_raster() +
  theme(axis.text.y=element_blank(),
        axis.ticks.y=element_blank())+
  scale_fill_gradientn(colors=c("white","red","blue","black"),values=scales::rescale(c(0,1,5,10,50,1000,100000)))+
  ylab("motif-dependent piRNA loci")+xlab("precursor length")+
  ggtitle(names_agg[i]))

}
## Saving 7 x 5 in image

## Saving 7 x 5 in image

## Saving 7 x 5 in image

## Saving 7 x 5 in image

## Saving 7 x 5 in image

## Saving 7 x 5 in image

## Saving 7 x 5 in image

## Saving 7 x 5 in image

pausing_strength_data<-read.table("../../Reference_pausing_signal_strength/all_pis_typeI_pausingscores.bed",header=TRUE)
pausing_strength_data<-pausing_strength_data[,c(4,10:12)]
pausing_strength_data$V4<-as.character(pausing_strength_data$V4)


for (i in seq(length(len_matrix_numreads))){

data<-len_matrix_uniqueseqs[[i]]
data<-data[order(apply(data,MARGIN=1,FUN=sum),decreasing = FALSE),]

melted_data<-melt(data)
melted_data$Var1<-as.character(melted_data$Var1)
melted_data<-merge(melted_data,pausing_strength_data,
                   by.x="Var1",by.y="V4")

melted_data$Var1<-factor(melted_data$Var1,levels=rownames(data))

for (bin in unique(melted_data$tm_downstream_bins)){
  melted_data_subset<-melted_data[which(melted_data$tm_downstream_bins==bin),]
  print(ggplot(melted_data_subset, aes(Var2,Var1, fill=value)) + geom_raster() +
  theme(axis.text.y=element_blank(),
        axis.ticks.y=element_blank())+
  ylab("motif-dependent piRNA loci")+xlab("precursor length")+
    scale_fill_distiller(palette="Greys")+
  ggtitle(paste(names_agg[i],bin,sep="-")))
  
  ggsave(paste(paste(names_agg[i],bin,sep="-"),"_uniqueseqs.pdf",sep=""))
}

# data<-len_matrix_numreads[[i]]
# data<-data[order(apply(data,MARGIN=1,FUN=sum),decreasing = FALSE),]
# 
# melted_data<-melt(data)
# melted_data$Var1<-as.character(melted_data$Var1)
# melted_data<-merge(melted_data,pausing_strength_data,
#                    by.x="Var1",by.y="V4")
# 
# melted_data$Var1<-factor(melted_data$Var1,levels=rownames(data))

# for (bin in unique(melted_data$tm_downstream_bins)){
#   melted_data_subset<-melted_data[which(melted_data$tm_downstream_bins==bin),]
#   print(ggplot(melted_data_subset, aes(Var2,Var1, fill=value)) + geom_raster() +
#   theme(axis.text.y=element_blank(),
#         axis.ticks.y=element_blank())+
#   scale_fill_gradientn(colors=c("white","red","blue","black"),values=scales::rescale(c(0,1,5,10,50,1000,100000)))+
#   ylab("motif-dependent piRNA loci")+xlab("precursor length")+
#     ggtitle(paste(names_agg[i],bin,sep="-"))+
#   ggtitle(paste(names_agg[i],bin,sep="-")))
#   
#   ggsave(paste(paste(names_agg[i],bin,sep="-"),"_numreads.pdf",sep=""))

#   }


   }
## Saving 7 x 5 in image

## Saving 7 x 5 in image

## Saving 7 x 5 in image

## Saving 7 x 5 in image

## Saving 7 x 5 in image

## Saving 7 x 5 in image

## Saving 7 x 5 in image

## Saving 7 x 5 in image

## Saving 7 x 5 in image

## Saving 7 x 5 in image

## Saving 7 x 5 in image

## Saving 7 x 5 in image

## Saving 7 x 5 in image

## Saving 7 x 5 in image

## Saving 7 x 5 in image

## Saving 7 x 5 in image

## Saving 7 x 5 in image

## Saving 7 x 5 in image

## Saving 7 x 5 in image

## Saving 7 x 5 in image

## Saving 7 x 5 in image

## Saving 7 x 5 in image

## Saving 7 x 5 in image

## Saving 7 x 5 in image

## Saving 7 x 5 in image

## Saving 7 x 5 in image

## Saving 7 x 5 in image

## Saving 7 x 5 in image

## Saving 7 x 5 in image

## Saving 7 x 5 in image

## Saving 7 x 5 in image

## Saving 7 x 5 in image

## Saving 7 x 5 in image

## Saving 7 x 5 in image

## Saving 7 x 5 in image

## Saving 7 x 5 in image

## Saving 7 x 5 in image

## Saving 7 x 5 in image

## Saving 7 x 5 in image

## Saving 7 x 5 in image